library(conflicted)
library(tidyverse)
library(scales)
library(ggrepel)
library(gghighlight)
library(mapSpain)
library(ESdata)
conflicts_prefer(
dplyr::filter,
dplyr::select,
dplyr::lag,
plotly::layout
)Spain Occupational Accident Analysis (SODA)
Business Analytics Project
Introduction
This report covers the analysis of occupational accidents, also known as work accidents, in Spain
Data Exploration
Work Accident Data
load("../data/atr_clean.RData")
atr |>
filter(sector == "Total",
ccaa == "ES") |>
ggplot(aes(date, accidents)) +
geom_line() +
geom_point()atr |>
filter(ccaa == "ES") |>
ggplot(aes(x = date, y = accidents, color = sector)) +
geom_line() +
geom_point(size=2) +
labs(x = "Year",
y = "Work accidents per 100k workers",
title = "Work accidents in Spain from 2009 to 2021")atr |>
filter(ccaa == "ES") |>
ggplot(aes(x = date, y = accidents, color = sector)) +
geom_line() +
geom_point() +
gghighlight(sector != "total", label_key = sector, use_group_by = FALSE) +
facet_wrap(~sector)atr |>
filter(sector == "Total",
year == 2021) |>
ggplot(aes(x = accidents, y = fct_reorder(ccaa_name, accidents))) +
geom_col() +
labs(x = "Work accidents per 100k workers",
y = NULL,
title = "Work accidents in 2021 per autonomous community")Add year over year change
atr <- atr |>
mutate(acc_yoy = (accidents - lag(accidents)) / lag(accidents),
.by = c(sector, ccaa))atr |>
filter(ccaa == "ES") |>
drop_na() |>
ggplot(aes(x = date, y = acc_yoy, color = sector)) +
geom_hline(yintercept = 0, linetype = "dashed", color = "gray50") +
geom_line() +
geom_point() +
scale_y_continuous(labels = label_percent()) +
labs(x = "Year",
y = "Year over year change",
title = "Work accidents in Spain from 2009 to 2021")atr |>
filter(ccaa == "ES") |>
drop_na() |>
ggplot(aes(x = date, y = acc_yoy, fill = sector)) +
geom_col() +
facet_wrap(~sector) +
scale_y_continuous(labels = label_percent()) +
labs(x = "Year",
y = "Year over year change",
title = "Work accidents in Spain from 2009 to 2021") +
theme(legend.position = c(1, 0),
legend.justification = c(1, 0))Working population
work_pop <- epa_sector |>
filter(edad == "total",
sexo == "total") |>
select(-edad, -sexo)
work_pop |>
filter(region == "ES") |>
ggplot(aes(x = periodo, y = valor, color = sector)) +
geom_line() +
geom_point() +
labs(x = "Year", y = "100k workers",
title = "Active working population in Spain")Working population per sector
# Add relative work pop per sector
work_pop <- work_pop |>
mutate(rel = valor / valor[sector == "total"],
.by = c(periodo, region))
work_pop |>
# Add full AC names
left_join(ccaa_iso, by = join_by(region == iso)) |>
filter(periodo == "2023-01-31",
sector != "total",
!region %in% c("ES-ML", "ES-CE")) |>
ggplot(aes(rel, fct_reorder(nombres, rel), fill = sector)) +
geom_col() +
scale_x_continuous(labels = label_percent()) +
labs(x = "Percentage", y = NULL,
title = "Relative working population per sector")Non-EU-working population
work_noEU <- epa_nac |>
# Only view all sexes and working population
filter(sexo == "total",
dato == "act") |>
select(-sexo, -dato) |>
# Compute relative amount of workforce per nationality
mutate(rel = valor / valor[nac == "total"],
.by = c(region, periodo)) |>
# Only keep no-UE
filter(nac == "no-UE") |>
select(-valor, -nac) |>
rename(noEU_rel = rel)
work_noEU |> head()Plot non-EU workforce per AC:
work_noEU |>
filter(periodo == "2023-01-31") |>
# Add full AC names
left_join(ccaa_iso, by = join_by(region == iso)) |>
ggplot(aes(x = noEU_rel, y = fct_reorder(nombres, noEU_rel))) +
geom_col() +
scale_x_continuous(labels = label_percent()) +
labs(x = "Percentage",
y = NULL,
title = "Non-EU working population in 2023 per autonomous community")GDP
agr = c("vab_A", "vab_BE", "vab_F", "vab_GT", "pib")
agr_rename = c(
"agricultura" = "vab_A",
"industria" = "vab_BE",
"construccion" = "vab_F",
"servicios" = "vab_GT",
"total" = "pib"
)
gdp <- pib_pm_oferta |>
filter(agregado %in% agr,
tipo == "indice",
dato == "base",
ajuste == "ajustado") |>
mutate(month = month(periodo), year = year(periodo)) |>
filter(month == 6) |>
# Remove identical elements
distinct(across(periodo:dato), .keep_all = TRUE) |>
select(periodo, agregado, valor) |>
rename(date = periodo, sector = agregado, gdp = valor) |>
mutate(sector = fct_recode(sector, !!!agr_rename))
gdp |>
ggplot(aes(date, gdp, color = sector)) +
geom_line()Inflation
infl <- ipc_ccaa |>
# Only extract annual inflation and price index over all groups
filter(grupo == "general",
dato %in% c("anual", "indice")) |>
select(-grupo) |>
pivot_wider(names_from = dato, values_from = valor) |>
rename(infl_cpi = indice, infl_ann = anual) |>
# Annual inflation in decimal format
mutate(infl_ann = infl_ann / 100)
infl |> head()Map Plots
# Get Spanish ACs as simple features
ccaa <- esp_get_ccaa()
can_box <- esp_get_can_box()
atr_2021 <- atr |>
filter(year == 2021,
ccaa != "ES",
sector == "Total") |>
mutate(ccaa = as.character(ccaa))
# Join SF and work accidents data
ccaa_atr <- ccaa |>
inner_join(atr_2021, by = join_by(iso2.ccaa.code == ccaa))
ggplot(ccaa_atr) +
# Plot ACs
geom_sf(aes(fill = accidents),
color = "grey70",
linewidth = .3) +
# Plot canaries box
geom_sf(data = can_box, color = "grey70") +
# Plot labels with accident number
geom_label_repel(
aes(label = round(accidents), geometry = geometry),
stat = "sf_coordinates",
fill = alpha(c("white"), 0.5),
color = "black",
size = 3,
label.size = 0
) +
# Adjust color scale
scale_fill_distiller(palette = "Blues", direction = 1) +
theme_void() +
theme(legend.position = c(0.12, 0.6)) +
labs(x = NULL,
y = NULL,
title = "Work accidents in 2021 per autonomous community",
fill = "Accidents per\n100k workers")library(plotly)
library(sf)
ccaa <- esp_get_ccaa() |> st_cast("MULTIPOLYGON")
ccaa_atr <- ccaa |>
inner_join(atr_2021, by = join_by(iso2.ccaa.code == ccaa))
p <- ggplot(ccaa_atr) +
geom_sf(aes(fill = accidents,
text = paste0(ccaa_name, ":\n", round(accidents))),
color = "grey70",
linewidth = .3) +
geom_sf(data = can_box, color = "grey70") +
scale_fill_distiller(palette = "Blues", direction = 1) +
theme_void() +
labs(x = NULL,
y = NULL,
title = "Work accidents in 2021 per autonomous community",
fill = "Accidents per\n100k workers")
ggplotly(p, tooltip = "text") |> style(hoveron = "fill")